package main

import (
	"database/sql"
	"fmt"
	_ "github.com/go-sql-driver/mysql" // 使用 _ 匿名加载 MySQL 驱动包
	"github.com/lisgroup/easyhttp"
	"log"
	"regexp"
	"strconv"
	"strings"
)

type Param map[string]string

func SpiderJueJin(index int, ch chan int) {
	body := map[string]string{}
	urls := "https://movie.douban.com/top250?start=" + strconv.Itoa((index-1)*25) + "&filter="
	headers := map[string]string{"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"}
	result, err := easyhttp.Request(urls, "GET", body, headers, 4)
	//fmt.Println(result, err)
	if err != nil {
		fmt.Println("HttpGet err: ", err)
	}
	// 使用正则匹配
	regExp := `<div class="item">[\s\S]*?<div class="pic">[\s\S]*?<em class="">(.*?)<\/em>[\s\S]*?<a href=".*?">[\s\S]*?<img width=".*?" alt="(.*?)" src="(.*?)" class=".*?">[\s\S]*?div class="info[\s\S]*?class="hd"[\s\S]*?class="title">(.*?)<\/span>[\s\S]*?class="other">(.*?)<\/span>[\s\S]*?<div class="bd">[\s\S]*?<p class=".*?">([\s\S]*?)<br>([\s\S]*?)<\/p>[\s\S]*?span class="rating_num".*?average">(.*?)<\/span>`
	content := findByReg(regExp, result)
	//fmt.Println(content)
	//var newSlice = make([][]string, len(content))
	//for key, val := range content {
	//	newSlice[key] = val[1:]
	//}
	//fmt.Println(newSlice)
	// 入库操作
	insertMyDB(content)
	//return content
	// chan 记录
	ch <- index
}

// 通过正则表达式查找内容
func findByReg(regExp, result string) [][]string {
	// 使用正则匹配
	find := regexp.MustCompile(regExp)
	//return find.FindAllString(result, -1) // 仅返回匹配的数据
	return find.FindAllStringSubmatch(result, -1) // 返回匹配的详细二维切片
}

/**
CREATE TABLE `top250` (
  `id` int(20) NOT NULL AUTO_INCREMENT,
  `title` varchar(20) DEFAULT '',
  `image` varchar(100) DEFAULT '',
  `subtitle` varchar(255) DEFAULT '',
  `other` varchar(255) DEFAULT NULL,
  `personnel` varchar(255) DEFAULT '',
  `info` varchar(255) DEFAULT '',
  `score` varchar(10) DEFAULT '',
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4;
*/
func insertMyDB(movies [][]string) {
	// 存放 (?, ?, ...) 的slice
	valueStrings := make([]string, 0, len(movies))
	// 存放values的slice
	valueArgs := make([]interface{}, 0, len(movies)*8)
	// 遍历切片准备数据
	for _, val := range movies {
		// 占位符
		valueStrings = append(valueStrings, "(?, ?, ?, ?, ?, ?, ?, ?)")
		for i := 1; i < len(val); i++ {
			valueArgs = append(valueArgs, val[i])
		}
	}

	db, err := sql.Open("mysql",
		"root:root@tcp(127.0.0.1:3306)/test")
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()

	// 自行拼接要执行的具体语句
	sqlName := fmt.Sprintf("INSERT INTO `top250` (id,title,image,subtitle,other,personnel,info,score) VALUES %s",
		strings.Join(valueStrings, ","))
	fmt.Println(sqlName)
	// insert
	stmt, err := db.Prepare(sqlName)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(valueArgs)
	res, err := stmt.Exec(valueArgs...)
	if err != nil {
		log.Fatal(err)
	}
	lastId, err := res.LastInsertId()
	if err != nil {
		log.Fatal(err)
	}
	rowCnt, err := res.RowsAffected()
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("ID = %d, affected = %d\n", lastId, rowCnt)

}

func main() {
	var start, end int
	fmt.Println("请输入爬取的起始页（>=1）: ...")
	fmt.Scan(&start)
	fmt.Println("请输入爬取的结束页(>=start): ...")
	fmt.Scan(&end)
	//start, end = 2, 2
	fmt.Println(start, end)

	channel := make(chan int)
	for i := start; i <= end; i++ {
		go SpiderJueJin(i, channel)
	}
	for i := start; i <= end; i++ {
		fmt.Println("第" + strconv.Itoa(<-channel) + "页任务完成")
	}
}
